In [2]:
import matplotlib
import Bio.SeqIO
import Bio.Restriction
import scipy
import matplotlib.pyplot as plt
%matplotlib inline
import math
import random
import numpy as np

In [235]:
# standard deviation with Bionano data
def signmaS(len):
    singma = (0.2*0.2) + len * 0.1 * (-0.1) + len*len*0.04*0.04
    return(singma)

In [236]:
handle = open("/s/fir/a/nobackup/data/ECOLI_Reference_Genome/ecoli.fa", "rU")
for record in Bio.SeqIO.parse(handle, "fasta"):
    ref_seq = record.seq
frag_sizes = [len(seq) for seq in Bio.Restriction.XhoI.catalyse(ref_seq)]

In [237]:
def subseq(seq, limits):
    x,y = limits
    if x < y : return seq[x:y]
    return seq[x:len(seq)] + seq[0:y]

In [238]:
molecules = []
for i in range(200):
    breaks = []
    num_breaks = 10 #random.randint(20,50)
    for j in range(num_breaks):
        breaks.append(random.randint(0,len(ref_seq) - 1))
    breaks.sort()        
    pairs = [(x,y) for x,y in zip(breaks[:-1], breaks[1:])] + [(breaks[-1], breaks[0])]
    molecules += [[subseq(ref_seq, pair), pair] for pair in pairs]

In [251]:
def sim(mol):
    "Return a simulated set of fragment sizes in bp for a Bio.SeqIO seq"
    error_locations = []
    sites = [biosite - 1 for biosite in Bio.Restriction.XhoI.search(mol)]
    e_f_sites = list(sites);
    
    for i, site in enumerate(sites):
        if (random.randint(1,5) == 5):
            sites.pop(i)
            error_locations.append(i+1)                    
            
    allsites = [0] + sites + [len(mol)]
    e_f_allsites = [0] + e_f_sites + [len(mol)]
    # TODO add random break sites
    sizes = [y - x for x,y in zip(allsites[:-1], allsites[1:])]
    print(sizes)
    print(sum(sizes))
    e_f_sizes = [y - x for x,y in zip(e_f_allsites[:-1], e_f_allsites[1:])]
    sizes = [size/1000.0 for size in sizes] # convert to kb
    e_f_sizes = [size/1000.0 for size in e_f_sizes] # convert to kb    
    sizes = [size + random.gauss(0, math.sqrt(signmaS(size))) for size in sizes]
    sizes = [size for size in sizes if size > .5]
    e_f_sizes = [size for size in e_f_sizes if size > .5]
    return [sizes] + [e_f_sizes] + [error_locations]

In [230]:
m = open("sim_single_molecule_longer_200","w")
mwe = open("sim_single_molecule_without_error_longer_200","w")

In [231]:
start = 0
end = 0
length_of_original = []
length_of_errored = []
accum = 0
for i, mol in enumerate((molecule for molecule in molecules if len(molecule[0]) > 250000)):
    simulated = sim(mol[0])  
    accum += sum(simulated[1])
    lens = [str(round(frag,3)) for frag in simulated[0]]
    if len(lens) < 10: continue
    map_name = "map_" + str(i) + "\n"
    
    lens_e_free = [str(round(frag,3)) for frag in simulated[1]]
    length_of_errored.append(len(lens))
    length_of_original.append(len(lens_e_free))
    #if i == 100: break

    if(start == 0 and end ==0):
        start = mol[1][0]
        end = mol[1][1]
    elif(start <= mol[1][0] and mol[1][0] >= end):
        m.write(map_name)
        m.write("\tXhoI\tXhoI\t" + "\t".join(lens) + "\n")
        m.write("\n")
        
        mwe.write(map_name)
        mwe.write("\tXhoI\tXhoI\t" + "\t".join(lens_e_free) + "\n")
        mwe.write("\n")
    elif(start <= mol[1][1] and mol[1][0] >= end):
        m.write("map_" + str(i) + "\n")
        m.write("\tXhoI\tXhoI\t" + "\t".join(lens) + "\n")        
        m.write("\n")
        
        mwe.write(map_name)
        mwe.write("\tXhoI\tXhoI\t" + "\t".join(lens_e_free) + "\n")
        mwe.write("\n")
m.close()
mwe.close()

In [232]:
def nextRange(cur_range, bin_size, s_varience):
    if(cur_range < 2000):
        return(cur_range+ bin_size)
    else:
        return(round(((cur_range/1000.0) + 4 * math.sqrt((cur_range/1000.0) * s_varience))*1000))

quantizeList = []
def quantizeNew(val, bin_size, s_varience):    
    if(val == 0):
        return(0)
    
    if(len(quantizeList) == 0):
        quantizeList.append(0.0)
        quantizeList.append(nextRange(0.0, bin_size, s_varience))
        
    for i in range(len(quantizeList)):
        if(val < quantizeList[i]):
            return(quantizeList[i-1])
    while(1):
        quantizeList.append(nextRange(quantizeList[len(quantizeList) - 1], bin_size, s_varience))
        if(val < quantizeList[len(quantizeList) - 1]):
            return(quantizeList[len(quantizeList) - 2])

In [217]:
def findNoOfFragments(fname):
    lengths = []
    lines = [line.strip() for i,line in enumerate(open(fname)) if (i%3==1)]       
    for line in lines:
         lengths.append((len(line.split("\t"))) - 2)
    count = 0.0
    for length in lengths:
        count += length
    print(count/len(lines))
    plt.hist(lengths, bins = 50)

In [218]:
findNoOfFragments("/s/oak/b/nobackup/muggli/goat/whole_genome_mapping/goat_whole_genome.maps")


25.4556773106

In [166]:
findNoOfFragments("sim_single_molecule")


25.4556773106
13.2268538833

In [219]:
findNoOfFragments("sim_single_molecule_longer")


23.3318181818

In [233]:
findNoOfFragments("sim_single_molecule_longer_200")


22.4797843666

------------- With BioNano deletion error rate --------------


In [573]:
# standard deviation with Bionano data
def signmaS(len):
    singma = (0.2*0.2) + len * 0.1 * (-0.1) + len*len*0.04*0.04
    return(singma)

handle = open("/s/fir/a/nobackup/data/ECOLI_Reference_Genome/ecoli.fa", "rU")
for record in Bio.SeqIO.parse(handle, "fasta"):
    ref_seq = record.seq
frag_sizes = [len(seq) for seq in Bio.Restriction.XhoI.catalyse(ref_seq)]

def subseq(seq, limits):
    x,y = limits
    if x < y : return seq[x:y]
    return seq[x:len(seq)] + seq[0:y]

molecules = []
for i in range(400):
    breaks = []
    num_breaks = 10 #random.randint(20,50)
    for j in range(num_breaks):
        breaks.append(random.randint(0,len(ref_seq) - 1))
    breaks.sort()        
    pairs = [(x,y) for x,y in zip(breaks[:-1], breaks[1:])] + [(breaks[-1], breaks[0])]
    molecules += [[subseq(ref_seq, pair), pair] for pair in pairs]

In [574]:
def generateRandomList(listv, count):
    randomList = []
    randomListIndex = []
    index = range(0, len(listv))
    for c in range(0, count):               
        i = random.choice(index)
        randomList.append(listv[i])
        randomListIndex.append(i)
        index.remove(i)
    randomListIndex, randomList = zip(*sorted(zip(randomListIndex,randomList)))
    return(randomListIndex, randomList)

def deleteRandomly(listv, count):
    deleted = generateRandomList(listv, count)
    deleted_index = list(deleted[0])
    deleted_values = list(deleted[1])    
    for d in deleted_values:
        listv.remove(d)
    return(listv, deleted_index, deleted_values)

def sim(mol):
    "Return a simulated set of fragment sizes in bp for a Bio.SeqIO seq"    
    sites = [biosite - 1 for biosite in Bio.Restriction.XhoI.search(mol)]
    e_f_sites = list(sites);
    
    sites, dindex, dsites = deleteRandomly( sites, int(len(mol)/100000))    
            
    allsites = [0] + sites + [len(mol)]
    e_f_allsites = [0] + e_f_sites + [len(mol)]    
    # TODO add random break sites
    sizes = [y - x for x,y in zip(allsites[:-1], allsites[1:])]
    e_f_sizes = [y - x for x,y in zip(e_f_allsites[:-1], e_f_allsites[1:])]
    sizes = [size/1000.0 for size in sizes] # convert to kb
    e_f_sizes = [size/1000.0 for size in e_f_sizes] # convert to kb    
    sizes = [size + random.gauss(0, math.sqrt(signmaS(size))) for size in sizes if size > 0.5]
    sizes = [size for size in sizes if size > 0.5]
    e_f_sizes = [size for size in e_f_sizes if size > 0.5]
    return (sizes, e_f_sizes, dindex)

In [575]:
filename = "sim_single_molecule_100"
m = open(filename+"_newDel","w")
mwe = open(filename+"_efree","w")
efile  = open(filename+"_elocations","w")

start = 0
end = 0
length_of_original = []
length_of_errored = []
accum = 0
for i, mol in enumerate((molecule for molecule in molecules if len(molecule[0]) > 250000)):
    simulated = sim(mol[0])  
    accum += sum(simulated[1])
    lens = [str(round(frag,3)) for frag in simulated[0]]
    if len(lens) < 10: continue
    map_name = "map_" + str(i) + "\n"
    
    lens_e_free = [str(round(frag,3)) for frag in simulated[1]]
    length_of_errored.append(len(lens))
    length_of_original.append(len(lens_e_free))
    #if i == 100: break

    if(start == 0 and end ==0):
        start = mol[1][0]
        end = mol[1][1]
    
#    elif(start <= mol[1][0] and mol[1][0] >= end):
    if((start <= mol[1][0] and mol[1][0] <= end) or (start <= mol[1][1] and mol[1][1] <= end) or (start >= mol[1][0] and mol[1][1] >= end)):
        m.write(map_name)
        m.write("\tXhoI\tXhoI\t" + "\t".join(lens) + "\n")
        m.write("\n")
        
        mwe.write(map_name)
        mwe.write("\tXhoI\tXhoI\t" + "\t".join(lens_e_free) + "\n")
        mwe.write("\n")
                
        efile.write(' '.join([str(s) for s in simulated[2]]))
        efile.write("\n")
"""        
    elif(start <= mol[1][1] and mol[1][0] >= end):
        m.write("map_" + str(i) + "\n")
        m.write("\tXhoI\tXhoI\t" + "\t".join(lens) + "\n")        
        m.write("\n")
        
        mwe.write(map_name)
        mwe.write("\tXhoI\tXhoI\t" + "\t".join(lens_e_free) + "\n")
        mwe.write("\n")
                
        efile.write(' '.join([str(s) for s in simulated[2]]))
        efile.write("\n")
"""
m.close()
mwe.close()
efile.close()

------------- With BioNano deletion error rate (V2) --------------


In [208]:
# standard deviation with Bionano data
def signmaS(len):
    singma = (0.2*0.2) + len * 0.1 * (-0.1) + len*len*0.04*0.04
    return(singma)

handle = open("/s/fir/a/nobackup/data/ECOLI_Reference_Genome/ecoli.fa", "rU")
for record in Bio.SeqIO.parse(handle, "fasta"):
    ref_seq = record.seq
frag_sizes = [len(seq) for seq in Bio.Restriction.XhoI.catalyse(ref_seq)]

def subseq(seq, limits):
    x,y = limits
    if x < y : return seq[x:y]
    return seq[x:len(seq)] + seq[0:y]

molecules = []
for i in range(500):
    breaks = []
    num_breaks = 10 #random.randint(20,50)
    for j in range(num_breaks):
        breaks.append(random.randint(0,len(ref_seq) - 1))
    breaks.sort()        
    pairs = [(x,y) for x,y in zip(breaks[:-1], breaks[1:])] + [(breaks[-1], breaks[0])]
    molecules += [[subseq(ref_seq, pair), pair] for pair in pairs]

In [209]:
def generateRandomList(listv, count):
    randomList = []
    randomListIndex = []
    if(count >= len(listv) + 1):
        return(randomListIndex, randomList)
    if(count <= 0):
        return(randomListIndex, randomList)
    if(len(listv) == 0):
        return(randomListIndex, randomList)
    
    index = range(0, len(listv))
    for c in range(0, count):
        i = random.choice(index)
        randomList.append(listv[i])
        randomListIndex.append(i)
        index.remove(i)
    
    randomListIndex, randomList = zip(*sorted(zip(randomListIndex,randomList)))
    return(randomListIndex, randomList)

def deleteRandomly(listv, count):
    deleted = generateRandomList(listv, count)
    deleted_index = list(deleted[0])
    deleted_values = list(deleted[1])    
    for d in deleted_values:
        listv.remove(d)
    return(listv, deleted_index, deleted_values)

def insertVal(A, val):
    idx = A.index(val)
    if(idx == 0):
        A.insert(idx, random.randint(0, A[idx]))
    else:
        A.insert(idx, random.randint((A[idx - 1 ]), A[idx]))
        
def addfalsepositive(listv, count):
    if(count == 0):
        return(listv, count)
    err_cites,sel_val = generateRandomList(listv, count)
    for val in sel_val:
        insertVal(listv, val)
    return(listv, err_cites, count)

def sim(mol):
    "Return a simulated set of fragment sizes in bp for a Bio.SeqIO seq"    
    sites = [biosite - 1 for biosite in Bio.Restriction.XhoI.search(mol)]
    e_f_sites = list(sites);    
    sites, dindex, dsites = deleteRandomly( sites, int(round(len(sites)/6.66)))    
    sites, iindex, ins_err = addfalsepositive(sites, int(round(len(mol)/400000.0)))
    allsites = [0] + sites + [len(mol)]
    e_f_allsites = [0] + e_f_sites + [len(mol)]    
    # TODO add random break sites
    sizes = [y - x for x,y in zip(allsites[:-1], allsites[1:])]
    e_f_sizes = [y - x for x,y in zip(e_f_allsites[:-1], e_f_allsites[1:])]
    sizes = [size/1000.0 for size in sizes] # convert to kb
    e_f_sizes = [size/1000.0 for size in e_f_sizes] # convert to kb
    
    sizes = [size + random.gauss(0, math.sqrt(signmaS(size))) for size in sizes if size > 0.5]
    sizes = [size for size in sizes if size > 0.5]
    e_f_sizes = [size for size in e_f_sizes if size > 0.5]
    return (sizes, e_f_sizes, dindex, ins_err, iindex)

In [211]:
filename = "sim_single_molecule_100"
m = open(filename+"_newDel","w")
mwe = open(filename+"_efree","w")
efile  = open(filename+"_elocations","w")

start = 0
end = 0
length_of_original = []
length_of_errored = []
accum = 0
tot_ins_err = 0
tot_del_err = 0
for i, mol in enumerate((molecule for molecule in molecules if len(molecule[0]) > 250000)):
    simulated = sim(mol[0])      
    lens = [str(round(frag,3)) for frag in simulated[0]]
    if len(lens) < 10: continue
    map_name = "map_" + str(i) + "\n"
    
    lens_e_free = [str(round(frag,3)) for frag in simulated[1]]
    length_of_errored.append(len(lens))
    length_of_original.append(len(lens_e_free))
    #if i == 100: break
    tot_ins_err += simulated[3]
    tot_del_err += len(simulated[2])
    
    if(start == 0 and end ==0):
        start = mol[1][0]
        end = mol[1][1]
    
#    elif(start <= mol[1][0] and mol[1][0] >= end):
#    if((start <= mol[1][0] and mol[1][0] <= end) or (start <= mol[1][1] and mol[1][1] <= end) or (start >= mol[1][0] and mol[1][1] >= end)):
    m.write(map_name)
    m.write("\tXhoI\tXhoI\t" + "\t".join(lens) + "\n")
    m.write("\n")

    mwe.write(map_name)
    mwe.write("\tXhoI\tXhoI\t" + "\t".join(lens_e_free) + "\n")
    mwe.write("\n")

    efile.write(','.join([str(s) for s in simulated[2]]) + ' : ' + ','.join([str(s) for s in simulated[4]]))
    efile.write("\n")
"""        
    elif(start <= mol[1][1] and mol[1][0] >= end):
        m.write("map_" + str(i) + "\n")
        m.write("\tXhoI\tXhoI\t" + "\t".join(lens) + "\n")        
        m.write("\n")
        
        mwe.write(map_name)
        mwe.write("\tXhoI\tXhoI\t" + "\t".join(lens_e_free) + "\n")
        mwe.write("\n")
                
        efile.write(' '.join([str(s) for s in simulated[2]]))
        efile.write("\n")
"""
m.close()
mwe.close()
efile.close()
print("Total insertion errors: " + str(tot_ins_err))
print("Total deletion errors : "+ str(tot_del_err))


Total insertion errors: 5119
Total deletion errors : 11929

------------- With BioNano deletion error rate (V3) --------------


In [82]:
# standard deviation with Bionano data
def signmaS(len):
    singma = (0.2*0.2) + len * 0.1 * (-0.1) + len*len*0.04*0.04
    return(singma)

handle = open("/s/fir/a/nobackup/data/ECOLI_Reference_Genome/ecoli.fa", "rU")
for record in Bio.SeqIO.parse(handle, "fasta"):
    ref_seq = record.seq
frag_sizes = [len(seq) for seq in Bio.Restriction.XhoI.catalyse(ref_seq)]

def subseq(seq, limits):
    x,y = limits
    if x < y : return seq[x:y]
    return seq[x:len(seq)] + seq[0:y]

molecules = []
for i in range(500):
    breaks = []
    num_breaks = 10 #random.randint(20,50)
    for j in range(num_breaks):
        breaks.append(random.randint(0,len(ref_seq) - 1))
    breaks.sort()        
    pairs = [(x,y) for x,y in zip(breaks[:-1], breaks[1:])] + [(breaks[-1], breaks[0])]
    molecules += [[subseq(ref_seq, pair), pair] for pair in pairs]

In [83]:
def generateRandomList(listv, count):
    randomList = []
    randomListIndex = []
    if(count >= len(listv)):
        return(randomListIndex, randomList)
    if(count <= 0):
        return(randomListIndex, randomList)
    if(len(listv) == 0):
        return(randomListIndex, randomList)
    
    index = range(0, len(listv) - 1)
    for c in range(0, count):
        i = random.choice(index)
        randomList.append(listv[i])
        randomListIndex.append(i)
        index.remove(i)
    
    randomListIndex, randomList = zip(*sorted(zip(randomListIndex,randomList)))
    return(randomListIndex, randomList)

def deleteRandomly(listv, count):
    deleted = generateRandomList(listv, count)
    deleted_index = list(deleted[0])
    deleted_values = list(deleted[1])
    for i in reversed(deleted_index):        
        listv[i+1] = listv[i+1] + listv[i]
        listv.remove(listv[i])
    return(listv, deleted_index, deleted_values)

def insertVal(A, val):
    idx = A.index(val)        
    r = round(random.uniform(0.0, val), 3)
    A[idx] = r
    A.insert(idx, round((val - r), 3))
        
def addfalsepositive(listv, count):
    if(count == 0):
        return(listv, count)
    err_cites, sel_val = generateRandomList(listv, count)
    for val in reversed(sel_val):
        insertVal(listv, val)
    return(listv, err_cites, count)

def sim(mol):
    "Return a simulated set of fragment sizes in bp for a Bio.SeqIO seq"    
    sites = [biosite - 1 for biosite in Bio.Restriction.XhoI.search(mol)]    
    allsites = [0] + sites + [len(mol)]
    sizes = [y - x for x,y in zip(allsites[:-1], allsites[1:])]
    sizes = [size/1000.0 for size in sizes] # convert to kb
    sizes = [size for size in sizes if size > 1.1]
    if(bool(random.getrandbits(1)) == True):
        sizes = sizes[::-1]
    sizes = sizes[1:-1]
    e_f_sizes = list(sizes);
    sizes, dindex, dsites = deleteRandomly( sizes, int(round(len(sites)/6.66)))    
    sizes, iindex, ins_err = addfalsepositive(sizes, int(round(len(mol)/400000.0)))
    sizes = [size + random.gauss(0, math.sqrt(signmaS(size))) for size in sizes if size > 0.5]
    sizes = [size for size in sizes if size > 0.5]    
    return (sizes, e_f_sizes, dindex, ins_err, iindex)

In [84]:
filename = "sim_single_molecule_100"
m = open(filename+"_newDel","w")
mwe = open(filename+"_efree","w")
efile  = open(filename+"_elocations","w")

start = 0
end = 0
length_of_original = []
length_of_errored = []
accum = 0
tot_ins_err = 0
tot_del_err = 0
for i, mol in enumerate((molecule for molecule in molecules if len(molecule[0]) > 250000)):
    simulated = sim(mol[0])      
    lens = [str(round(frag,3)) for frag in simulated[0]]
    if len(lens) < 10: continue
    map_name = "map_" + str(i) + "\n"
    
    lens_e_free = [str(round(frag,3)) for frag in simulated[1]]
    length_of_errored.append(len(lens))
    length_of_original.append(len(lens_e_free))
    #if i == 100: break
    tot_ins_err += simulated[3]
    tot_del_err += len(simulated[2])
    
    if(start == 0 and end ==0):
        start = mol[1][0]
        end = mol[1][1]
    
#    elif(start <= mol[1][0] and mol[1][0] >= end):
#    if((start <= mol[1][0] and mol[1][0] <= end) or (start <= mol[1][1] and mol[1][1] <= end) or (start >= mol[1][0] and mol[1][1] >= end)):
    m.write(map_name)
    m.write("\tXhoI\tXhoI\t" + "\t".join(lens) + "\n")
    m.write("\n")

    mwe.write(map_name)
    mwe.write("\tXhoI\tXhoI\t" + "\t".join(lens_e_free) + "\n")
    mwe.write("\n")

    efile.write(','.join([str(s) for s in simulated[2]]) + ' : ' + ','.join([str(s) for s in simulated[4]]))
    efile.write("\n")

m.close()
mwe.close()
efile.close()
print("Total insertion errors: " + str(tot_ins_err))
print("Total deletion errors : "+ str(tot_del_err))


Total insertion errors: 4803
Total deletion errors : 11303

In [85]:
!cp /s/oak/b/nobackup/darshanw/ipython-notebooks/sim_single_molecule_100_newDel /s/oak/b/nobackup/darshanw/COmap/test/

In [86]:
!cp /s/oak/b/nobackup/darshanw/ipython-notebooks/sim_single_molecule_100_efree /s/oak/b/nobackup/darshanw/COmap/test/

In [87]:
!cp /s/oak/b/nobackup/darshanw/ipython-notebooks/sim_single_molecule_100_elocations /s/oak/b/nobackup/darshanw/COmap/test/

In [117]:
A = [20, 40 ,55, 78, 90, 11]
index = []
for i in range(0, len(A)):
    if(A[i] <= 50):
        index.append(i)
index


Out[117]:
[0, 1, 5]

In [167]:
for i in range(0, 10000):
    if((1.1 + random.gauss(0, math.sqrt(signmaS(1.1)))) < 0.5):
        print("poop")


poop

In [195]:
A = [40.0, 23, 31, 39.9, 50, 62]
def removeLessthan(A, no):
    removedindex = []
    removedvalues = [] 
    for i in range(0, len(A)):
        if(A[i] < no):
            removedindex.append(i)
            removedvalues.append(A[i])
    for v in removedvalues:
        A.remove(v)
        
    return(A, removedindex)

a,_ = removeLessthan(A, 30)

In [273]:
def deleteRandomly(listv, count):
    D = generateRandomList(listv, count)
    deleted_index = list(D[0])
    deleted_values = list(D[1])
    for i in deleted_index:
        if(i == len(listv)):
            return(listv, [], [])
        else:
            listv
            listv.remove(d)
            
    return(listv, deleted_index, deleted_values)

In [328]:
def insertVal(A, val):
    idx = A.index(val)        
    r = round(random.uniform(0.0, val), 3)
    A[idx] = r
    A.insert(idx, round((val - r), 3))

In [329]:
def addfalsepositive(listv, count):
    if(count == 0):
        return(listv, count)
    err_cites, sel_val = generateRandomList(listv, count)
    for val in reversed(sel_val):
        insertVal(listv, val)
    return(listv, err_cites, count)

------------- With BioNano deletion error rate (V4) --------------


In [89]:
# standard deviation with Bionano data
def signmaS(len):
    singma = (0.2*0.2) + len * 0.1 * (-0.1) + len*len*0.04*0.04
    return(singma)

handle = open("/s/fir/a/nobackup/data/ECOLI_Reference_Genome/ecoli.fa", "rU")
for record in Bio.SeqIO.parse(handle, "fasta"):
    ref_seq = record.seq
frag_sizes = [len(seq) for seq in Bio.Restriction.XhoI.catalyse(ref_seq)]

def subseq(seq, limits):
    x,y = limits
    if x < y : return seq[x:y]
    return seq[x:len(seq)] + seq[0:y]

molecules = []
for i in range(700):
    breaks = []
    num_breaks = 40 #random.randint(20,50)
    for j in range(num_breaks):
        breaks.append(random.randint(0,len(ref_seq) - 1))
    breaks.sort()        
    pairs = [(x,y) for x,y in zip(breaks[:-1], breaks[1:])] + [(breaks[-1], breaks[0])]
    molecules += [[subseq(ref_seq, pair), pair] for pair in pairs]

In [90]:
def generateRandomList(listv, count):
    randomList = []
    randomListIndex = []
    if(count >= len(listv)):
        return(randomListIndex, randomList)
    if(count <= 0):
        return(randomListIndex, randomList)
    if(len(listv) == 0):
        return(randomListIndex, randomList)
    
    index = range(0, len(listv) - 1)
    for c in range(0, count):
        i = random.choice(index)
        randomList.append(listv[i])
        randomListIndex.append(i)
        index.remove(i)
    
    randomListIndex, randomList = zip(*sorted(zip(randomListIndex,randomList)))
    return(randomListIndex, randomList)

def deleteRandomly(listv, count):
    deleted = generateRandomList(listv, count)
    deleted_index = list(deleted[0])
    deleted_values = list(deleted[1])
    for i in reversed(deleted_index):        
        listv[i+1] = listv[i+1] + listv[i]
        listv.remove(listv[i])
    return(listv, deleted_index, deleted_values)

def insertVal(A, val):
    idx = A.index(val)        
    r = round(random.uniform(0.0, val), 3)
    A[idx] = r
    A.insert(idx, round((val - r), 3))
        
def addfalsepositive(listv, count):
    if(count == 0):
        return(listv, count)
    err_cites, sel_val = generateRandomList(listv, count)
    for val in reversed(sel_val):
        insertVal(listv, val)
    return(listv, err_cites, count)

def sim(mol):
    "Return a simulated set of fragment sizes in bp for a Bio.SeqIO seq"    
    sites = [biosite - 1 for biosite in Bio.Restriction.XhoI.search(mol)]  
    sites.extend([biosite - 1 for biosite in Bio.Restriction.NheI.search(mol)])
    sites.extend([biosite - 1 for biosite in Bio.Restriction.EagI.search(mol)])
    sites.sort()
    
    allsites = [0] + sites + [len(mol)]
    sizes = [y - x for x,y in zip(allsites[:-1], allsites[1:])]
    sizes = [size/1000.0 for size in sizes] # convert to kb
    sizes = [size for size in sizes if size > 1.1]
    if(bool(random.getrandbits(1)) == True):
        sizes = sizes[::-1]
    sizes = sizes[1:-1]
    e_f_sizes = list(sizes);
    sizes, dindex, dsites = deleteRandomly( sizes, int(round(len(sites)/6.66)))    
    sizes, iindex, ins_err = addfalsepositive(sizes, int(round(len(mol)/200000.0)))
    sizes = [size + random.gauss(0, math.sqrt(signmaS(size))) for size in sizes if size > 0.5]
    sizes = [size for size in sizes if size > 0.5]    
    return (sizes, e_f_sizes, dindex, ins_err, iindex)

In [91]:
filename = "sim_single_molecule_100"
m = open(filename+"_newDel","w")
mwe = open(filename+"_efree","w")
efile  = open(filename+"_elocations","w")

start = 0
end = 0
length_of_original = []
length_of_errored = []
accum = 0
tot_ins_err = 0
tot_del_err = 0
for i, mol in enumerate((molecule for molecule in molecules if len(molecule[0]) > 250000)):
    simulated = sim(mol[0])      
    lens = [str(round(frag,3)) for frag in simulated[0]]
    if len(lens) < 10: continue
    map_name = "map_" + str(i) + "\n"
    
    lens_e_free = [str(round(frag,3)) for frag in simulated[1]]
    length_of_errored.append(len(lens))
    length_of_original.append(len(lens_e_free))
    #if i == 100: break
    tot_ins_err += simulated[3]
    tot_del_err += len(simulated[2])
    
    if(start == 0 and end ==0):
        start = mol[1][0]
        end = mol[1][1]
    
#    elif(start <= mol[1][0] and mol[1][0] >= end):
#    if((start <= mol[1][0] and mol[1][0] <= end) or (start <= mol[1][1] and mol[1][1] <= end) or (start >= mol[1][0] and mol[1][1] >= end)):
    m.write(map_name)
    m.write("\tXhoI\tXhoI\t" + "\t".join(lens) + "\n")
    m.write("\n")

    mwe.write(map_name)
    mwe.write("\tXhoI\tXhoI\t" + "\t".join(lens_e_free) + "\n")
    mwe.write("\n")

    efile.write(','.join([str(s) for s in simulated[2]]) + ' : ' + ','.join([str(s) for s in simulated[4]]))
    efile.write("\n")

m.close()
mwe.close()
efile.close()
print("Total insertion errors: " + str(tot_ins_err))
print("Total deletion errors : "+ str(tot_del_err))


Total insertion errors: 5674
Total deletion errors : 23271

In [92]:
!cp /s/oak/b/nobackup/darshanw/ipython-notebooks/sim_single_molecule_100_newDel /s/oak/b/nobackup/darshanw/COmap/test/

In [93]:
!cp /s/oak/b/nobackup/darshanw/ipython-notebooks/sim_single_molecule_100_efree /s/oak/b/nobackup/darshanw/COmap/test/

In [94]:
!cp /s/oak/b/nobackup/darshanw/ipython-notebooks/sim_single_molecule_100_elocations /s/oak/b/nobackup/darshanw/COmap/test/

In [ ]:


In [ ]:

============Experiments==================


In [20]:
handle = open("/s/fir/a/nobackup/data/ECOLI_Reference_Genome/ecoli.fa", "rU")
for record in Bio.SeqIO.parse(handle, "fasta"):
    ref_seq = record.seq
frag_sizes = [len(seq) for seq in Bio.Restriction.XhoI.catalyse(ref_seq)]
print(frag_sizes)


[47077, 55381, 40525, 2335, 27290, 18043, 7616, 1806, 5554, 23391, 12, 23605, 28270, 24142, 27238, 8511, 6776, 24921, 1462, 5108, 24335, 11058, 27070, 43326, 49950, 51518, 16481, 37904, 4946, 35071, 17610, 58586, 28871, 14231, 71874, 55145, 2763, 37516, 2107, 3698, 2894, 16741, 60041, 36938, 19058, 4897, 32535, 3642, 4556, 17626, 3007, 12754, 28213, 730, 26300, 33522, 9614, 178, 27279, 28887, 13417, 2786, 21812, 24018, 7009, 14835, 72120, 9739, 10360, 64508, 11884, 18627, 28582, 1557, 2506, 1913, 11641, 15440, 8697, 6170, 45312, 4218, 15062, 20234, 12854, 12396, 48964, 34111, 55638, 56159, 48165, 101, 2099, 13095, 111629, 66318, 116010, 32342, 14882, 79243, 504, 4995, 10312, 37236, 32650, 3417, 12928, 87122, 14466, 15855, 110769, 28703, 4878, 24503, 84549, 3166, 25931, 24461, 68447, 70083, 30152, 24183, 11068, 3284, 6952, 28481, 19404, 44525, 38583, 36019, 76733, 3374, 13436, 27244, 13969, 7883, 1620, 58762, 3167, 44549, 24700, 44498, 39605, 8075, 4687, 32319, 18488, 17260, 10482, 42730, 37485, 6481, 29060, 4547, 14475, 60323, 13128, 498, 12437, 19199, 8085, 17715, 91796, 162, 24820, 64611, 42963, 249, 28217, 20233, 14977, 21401, 40922, 25728, 15415, 20810, 45383, 35631, 42729]

In [27]:
from __future__ import division
from Bio import SeqIO
from Bio.Restriction import *
import numpy
multi = (XhoI,NheI,EagI)

handle = open("/s/fir/a/nobackup/data/ECOLI_Reference_Genome/ecoli.fa", "rU")
for record in Bio.SeqIO.parse(handle, "fasta"):
    print(record.id + "\n")       
    print("\tXhoI\tNheI")
    L=[0]
    L.append (len(record)/1000)
    for enz in range(len(multi)):
        coords = multi[enz].search(record.seq)        
        for site in range(len(coords)):            
            L.append(coords[site]/1000)                                    
    L.sort()
        
    for i in range(1, len(L)):
        frags=[str(L[i]-L[i-1])]
        
        print( "\t"+ "\t".join(frags))
    print("\n")        
    print("\n")


ENA|U00096|U00096.2

	XhoI	NheI
	17.75
	7.404
	19.811
	2.113
	3.244
	18.526
	14.427
	3.825
	15.359
	4.25
	19.549
	1.349
	11.122
	4.255
	2.335
	10.299
	11.281
	5.71
	0.26
	1.272
	16.511
	2.447
	5.169
	1.806
	5.554
	19.036
	0.244
	2.756
	1.1
	0.255
	0.012
	10.987
	8.142
	0.57
	3.906
	12.169
	3.862
	12.239
	1.309
	1.62
	2.478
	1.686
	8.984
	8.065
	0.124
	1.068
	0.141
	1.002
	13.349
	11.554
	8.511
	6.776
	5.731
	18.655
	0.535
	1.462
	5.108
	18.31
	3.535
	2.49
	5.763
	5.295
	5.962
	8.346
	10.69
	2.072
	5.612
	16.235
	2.739
	18.74
	4.656
	3.125
	0.218
	14.479
	27.472
	40.897
	1.765
	4.322
	4.044
	0.49
	1.967
	14.514
	3.664
	13.07
	6.524
	9.148
	3.612
	1.886
	3.125
	1.821
	20.182
	1.916
	0.069
	3.254
	0.782
	8.868
	0.327
	11.398
	5.885
	5.826
	8.51
	18.15
	0.471
	11.599
	14.03
	17.494
	2.021
	9.356
	14.231
	14.004
	5.219
	10.978
	5.715
	35.958
	11.655
	3.433
	1.694
	15.842
	11.457
	2.234
	8.83
	2.763
	0.571
	2.127
	0.675
	9.958
	17.017
	2.836
	1.039
	3.293
	2.107
	3.698
	2.894
	9.824
	6.917
	6.982
	14.137
	7.232
	8.882
	22.808
	8.267
	6.946
	4.937
	16.788
	19.058
	4.897
	11.668
	20.867
	0.512
	3.13
	4.556
	1.255
	16.371
	3.007
	12.754
	10.541
	13.35
	4.322
	0.73
	22.255
	4.045
	2.332
	6.749
	10.633
	2.929
	0.131
	0.303
	2.205
	0.535
	0.535
	7.17
	9.614
	0.178
	2.937
	17.554
	3.21
	3.578
	5.334
	2.846
	5.581
	10.192
	4.934
	3.38
	0.996
	0.81
	8.231
	2.786
	21.812
	0.397
	1.733
	7.044
	12.612
	2.232
	7.009
	14.835
	42.569
	2.805
	20.041
	6.705
	3.912
	5.827
	10.36
	64.508
	2.205
	7.73
	1.949
	16.196
	2.431
	1.793
	1.567
	5.148
	0.078
	0.699
	5.753
	10.547
	2.997
	1.557
	2.506
	1.913
	6.927
	4.714
	15.44
	0.158
	5.059
	3.48
	0.948
	2.808
	2.414
	6.796
	10.229
	20.742
	7.545
	4.218
	15.062
	4.624
	15.61
	12.854
	9.399
	2.997
	7.59
	13.291
	28.083
	1.092
	10.659
	0.435
	21.925
	1.879
	16.38
	21.63
	8.406
	0.0839999999998
	7.259
	0.336
	16.109
	12.712
	4.501
	4.757
	6.051
	0.186
	4.19
	6.364
	0.953
	16.685
	7.342
	15.617
	2.791
	2.595
	1.054
	2.081
	0.101
	2.099
	3.376
	9.719
	0.667
	27.96
	1.066
	17.107
	4.472
	55.326
	5.031
	28.395
	2.439
	1.815
	20.116
	3.427
	0.125
	3.145
	2.693
	4.163
	17.021
	18.571
	4.501
	5.106
	2.914
	0.66
	9.989
	5.454
	8.253
	19.577
	2.768
	13.063
	2.919
	5.214
	8.116
	1.641
	16.504
	6.081
	14.882
	4.246
	3.341
	5.63
	3.752
	1.385
	15.871
	14.161
	23.498
	7.359
	0.504
	4.995
	4.184
	5.412
	0.716
	14.246
	1.543
	14.923
	6.524
	9.2
	17.169
	6.281
	3.417
	0.524
	12.404
	16.684
	4.829
	1.186
	5.432
	0.422
	22.981
	3.489
	3.047
	17.791
	6.385
	4.876
	3.738
	10.728
	15.855
	22.069
	2.7
	0.956
	1.099
	2.741
	0.244
	15.794
	10.747
	0.147
	1.709
	11.725
	2.315
	1.577
	0.678
	10.024
	26.244
	2.367
	26.336
	1.72
	3.158
	22.531
	1.972
	3.141
	13.189
	5.545
	10.676
	4.287
	0.429
	12.842
	16.28
	18.16
	3.166
	25.931
	24.461
	7.512
	16.698
	0.995
	4.065
	4.194
	4.785
	4.392
	14.073
	1.611
	10.122
	10.449
	3.714
	5.142
	7.646
	12.585
	0.296
	4.777
	14.841
	10.633
	30.152
	15.036
	9.147
	11.068
	0.755
	2.529
	6.952
	23.186
	3.702
	1.593
	8.996
	7.897
	2.511
	9.75
	34.775
	1.273
	3.874
	1.145
	3.539
	1.529
	26.691
	0.532
	19.71
	3.8
	12.509
	2.91
	0.492
	1.422
	10.641
	13.936
	5.782
	1.099
	2.747
	0.244
	12.656
	2.249
	1.742
	2.073
	2.832
	2.67
	2.223
	4.835
	6.18
	3.374
	1.486
	0.189
	1.484
	10.277
	0.495
	26.749
	6.125
	7.844
	7.883
	1.62
	11.264
	8.246
	2.016
	3.384
	11.436
	22.416
	3.167
	6.602
	18.87
	0.953
	0.471
	17.653
	0.739
	6.623
	2.669
	13.543
	1.126
	6.061
	3.626
	1.201
	16.831
	5.617
	0.804
	10.358
	30.17
	1.567
	7.868
	8.075
	4.687
	2.349
	0.953
	0.471
	3.248
	7.343
	0.935
	0.225
	9.061
	3.568
	1.527
	2.639
	4.489
	4.24
	7.074
	2.685
	7.235
	9.38
	0.645
	0.673
	1.715
	5.49
	2.604
	15.998
	1.591
	8.054
	15.533
	1.554
	2.428
	28.113
	3.66
	3.284
	6.481
	11.846
	1.392
	1.028
	2.296
	0.244
	2.664
	1.099
	0.508
	7.983
	4.547
	9.154
	0.157
	5.164
	8.412
	1.569
	21.779
	4.075
	8.701
	0.587
	8.88
	6.32
	1.88
	0.244
	2.757
	8.247
	0.498
	7.248
	5.189
	19.199
	8.085
	8.058
	3.256
	1.578
	4.823
	21.447
	29.253
	1.432
	9.814
	0.244000000001
	2.75
	6.349
	0.189
	20.318
	0.162
	8.583
	2.893
	0.244
	2.664
	10.436
	5.77
	5.37
	1.038
	18.829
	12.722
	9.717
	6.143
	1.898
	3.124
	16.943
	4.335
	4.098
	17.587
	0.249
	0.236000000001
	19.039
	2.024
	6.918
	20.233
	2.449
	12.528
	11.283
	5.237
	4.881
	8.82
	0.661
	31.441
	18.835
	3.155
	3.738
	1.834
	13.581
	4.672
	7.32
	1.761
	3.697
	3.36
	5.885
	9.01
	0.0909999999994
	0.732
	29.665
	3.976
	0.157999999999
	12.675
	7.196
	8.151
	3.475
	2.898
	4.313
	0.121
	0.115
	33.362
	0.914
	1.005





In [61]:
sites = [biosite - 1 for biosite in Bio.Restriction.XhoI.search(molecules[3][0])] 
sites


Out[61]:
[8782,
 73290,
 85174,
 103801,
 132383,
 133940,
 136446,
 138359,
 150000,
 165440,
 174137,
 180307,
 225619,
 229837,
 244899,
 265133,
 277987,
 290383,
 339347,
 373458,
 429096,
 485255,
 533420,
 533521,
 535620,
 548715,
 660344,
 726662]

In [64]:
sites = [biosite - 1 for biosite in Bio.Restriction.XhoI.search(molecules[3][0])] 
sites = []
sites.extend([biosite - 1 for biosite in Bio.Restriction.NheI.search(molecules[3][0])])
sites.sort()
sites


Out[64]:
[107161,
 113086,
 129386,
 145286,
 170657,
 197332,
 287386,
 375337,
 445541,
 467511,
 530285,
 538996,
 577342,
 595515,
 655313,
 688739,
 691178,
 713109,
 716661,
 719806,
 743683]

In [26]:
handle = open("/s/fir/a/nobackup/data/ECOLI_Reference_Genome/ecoli.fa", "rU")
for record in Bio.SeqIO.parse(handle, "fasta"):
    ref_seq = record.seq
frag_sizes = []
for seq in Bio.Restriction.XhoI.catalyse(ref_seq):    
    for seq1 in Bio.Restriction.NheI.catalyse(seq):
        for seq2 in Bio.Restriction.EagI.catalyse(seq1):
            frag_sizes.append(len(seq2))
print(frag_sizes)


[17749, 7404, 19811, 2113, 3244, 18526, 14427, 3825, 15359, 4250, 19549, 1349, 11122, 4255, 2335, 10299, 11281, 5710, 260, 1272, 16511, 2447, 5169, 1806, 5554, 19036, 244, 2756, 1100, 255, 12, 10987, 8142, 570, 3906, 12169, 3862, 12239, 1309, 1620, 2478, 1686, 8984, 8065, 124, 1068, 141, 1002, 13349, 11554, 8511, 6776, 5731, 18655, 535, 1462, 5108, 18310, 3535, 2490, 5763, 5295, 5962, 8346, 10690, 2072, 5612, 16235, 2739, 18740, 4656, 3125, 218, 14479, 27472, 40897, 1765, 4322, 4044, 490, 1967, 14514, 3664, 13070, 6524, 9148, 3612, 1886, 3125, 1821, 20182, 1916, 69, 3254, 782, 8868, 327, 11398, 5885, 5826, 8510, 18150, 471, 11599, 14030, 17494, 2021, 9356, 14231, 14004, 5219, 10978, 5715, 35958, 11655, 3433, 1694, 15842, 11457, 2234, 8830, 2763, 571, 2127, 675, 9958, 17017, 2836, 1039, 3293, 2107, 3698, 2894, 9824, 6917, 6982, 14137, 7232, 8882, 22808, 8267, 6946, 4937, 16788, 19058, 4897, 11668, 20867, 512, 3130, 4556, 1255, 16371, 3007, 12754, 10541, 13350, 4322, 730, 22255, 4045, 2332, 6749, 10633, 2929, 131, 303, 2205, 535, 535, 7170, 9614, 178, 2937, 17554, 3210, 3578, 5334, 2846, 5581, 10192, 4934, 3380, 996, 810, 8231, 2786, 21812, 397, 1733, 7044, 12612, 2232, 7009, 14835, 42569, 2805, 20041, 6705, 3912, 5827, 10360, 64508, 2205, 7730, 1949, 16196, 2431, 1793, 1567, 5148, 78, 699, 5753, 10547, 2997, 1557, 2506, 1913, 6927, 4714, 15440, 158, 5059, 3480, 948, 2808, 2414, 6796, 10229, 20742, 7545, 4218, 15062, 4624, 15610, 12854, 9399, 2997, 7590, 13291, 28083, 1092, 10659, 435, 21925, 1879, 16380, 21630, 8406, 84, 7259, 336, 16109, 12712, 4501, 4757, 6051, 186, 4190, 6364, 953, 16685, 7342, 15617, 2791, 2595, 1054, 2081, 101, 2099, 3376, 9719, 667, 27960, 1066, 17107, 4472, 55326, 5031, 28395, 2439, 1815, 20116, 3427, 125, 3145, 2693, 4163, 17021, 18571, 4501, 5106, 2914, 660, 9989, 5454, 8253, 19577, 2768, 13063, 2919, 5214, 8116, 1641, 16504, 6081, 14882, 4246, 3341, 5630, 3752, 1385, 15871, 14161, 23498, 7359, 504, 4995, 4184, 5412, 716, 14246, 1543, 14923, 6524, 9200, 17169, 6281, 3417, 524, 12404, 16684, 4829, 1186, 5432, 422, 22981, 3489, 3047, 17791, 6385, 4876, 3738, 10728, 15855, 22069, 2700, 956, 1099, 2741, 244, 15794, 10747, 147, 1709, 11725, 2315, 1577, 678, 10024, 26244, 2367, 26336, 1720, 3158, 22531, 1972, 3141, 13189, 5545, 10676, 4287, 429, 12842, 16280, 18160, 3166, 25931, 24461, 7512, 16698, 995, 4065, 4194, 4785, 4392, 14073, 1611, 10122, 10449, 3714, 5142, 7646, 12585, 296, 4777, 14841, 10633, 30152, 15036, 9147, 11068, 755, 2529, 6952, 23186, 3702, 1593, 8996, 7897, 2511, 9750, 34775, 1273, 3874, 1145, 3539, 1529, 26691, 532, 19710, 3800, 12509, 2910, 492, 1422, 10641, 13936, 5782, 1099, 2747, 244, 12656, 2249, 1742, 2073, 2832, 2670, 2223, 4835, 6180, 3374, 1486, 189, 1484, 10277, 495, 26749, 6125, 7844, 7883, 1620, 11264, 8246, 2016, 3384, 11436, 22416, 3167, 6602, 18870, 953, 471, 17653, 739, 6623, 2669, 13543, 1126, 6061, 3626, 1201, 16831, 5617, 804, 10358, 30170, 1567, 7868, 8075, 4687, 2349, 953, 471, 3248, 7343, 935, 225, 9061, 3568, 1527, 2639, 4489, 4240, 7074, 2685, 7235, 9380, 645, 673, 1715, 5490, 2604, 15998, 1591, 8054, 15533, 1554, 2428, 28113, 3660, 3284, 6481, 11846, 1392, 1028, 2296, 244, 2664, 1099, 508, 7983, 4547, 9154, 157, 5164, 8412, 1569, 21779, 4075, 8701, 587, 8880, 6320, 1880, 244, 2757, 8247, 498, 7248, 5189, 19199, 8085, 8058, 3256, 1578, 4823, 21447, 29253, 1432, 9814, 244, 2750, 6349, 189, 20318, 162, 8583, 2893, 244, 2664, 10436, 5770, 5370, 1038, 18829, 12722, 9717, 6143, 1898, 3124, 16943, 4335, 4098, 17587, 249, 236, 19039, 2024, 6918, 20233, 2449, 12528, 11283, 5237, 4881, 8820, 661, 31441, 18835, 3155, 3738, 1834, 13581, 4672, 7320, 1761, 3697, 3360, 5885, 9010, 91, 732, 29665, 3976, 158, 12675, 7196, 8151, 3475, 2898, 4313, 121, 115, 33362, 914, 1006]

In [30]:
handle = open("/s/fir/a/nobackup/data/ECOLI_Reference_Genome/ecoli.fa", "rU")
for record in Bio.SeqIO.parse(handle, "fasta"):
    ref_seq = record.seq
frag_sizes = []
for seq in Bio.Restriction.XhoI.catalyse(ref_seq):    
    for seq1 in Bio.Restriction.NheI.catalyse(seq):
        for seq2 in Bio.Restriction.EagI.catalyse(seq1):
            frag_sizes.append(len(seq2))
frag_sizes


Out[30]:
[17749,
 7404,
 19811,
 2113,
 3244,
 18526,
 14427,
 3825,
 15359,
 4250,
 19549,
 1349,
 11122,
 4255,
 2335,
 10299,
 11281,
 5710,
 260,
 1272,
 16511,
 2447,
 5169,
 1806,
 5554,
 19036,
 244,
 2756,
 1100,
 255,
 12,
 10987,
 8142,
 570,
 3906,
 12169,
 3862,
 12239,
 1309,
 1620,
 2478,
 1686,
 8984,
 8065,
 124,
 1068,
 141,
 1002,
 13349,
 11554,
 8511,
 6776,
 5731,
 18655,
 535,
 1462,
 5108,
 18310,
 3535,
 2490,
 5763,
 5295,
 5962,
 8346,
 10690,
 2072,
 5612,
 16235,
 2739,
 18740,
 4656,
 3125,
 218,
 14479,
 27472,
 40897,
 1765,
 4322,
 4044,
 490,
 1967,
 14514,
 3664,
 13070,
 6524,
 9148,
 3612,
 1886,
 3125,
 1821,
 20182,
 1916,
 69,
 3254,
 782,
 8868,
 327,
 11398,
 5885,
 5826,
 8510,
 18150,
 471,
 11599,
 14030,
 17494,
 2021,
 9356,
 14231,
 14004,
 5219,
 10978,
 5715,
 35958,
 11655,
 3433,
 1694,
 15842,
 11457,
 2234,
 8830,
 2763,
 571,
 2127,
 675,
 9958,
 17017,
 2836,
 1039,
 3293,
 2107,
 3698,
 2894,
 9824,
 6917,
 6982,
 14137,
 7232,
 8882,
 22808,
 8267,
 6946,
 4937,
 16788,
 19058,
 4897,
 11668,
 20867,
 512,
 3130,
 4556,
 1255,
 16371,
 3007,
 12754,
 10541,
 13350,
 4322,
 730,
 22255,
 4045,
 2332,
 6749,
 10633,
 2929,
 131,
 303,
 2205,
 535,
 535,
 7170,
 9614,
 178,
 2937,
 17554,
 3210,
 3578,
 5334,
 2846,
 5581,
 10192,
 4934,
 3380,
 996,
 810,
 8231,
 2786,
 21812,
 397,
 1733,
 7044,
 12612,
 2232,
 7009,
 14835,
 42569,
 2805,
 20041,
 6705,
 3912,
 5827,
 10360,
 64508,
 2205,
 7730,
 1949,
 16196,
 2431,
 1793,
 1567,
 5148,
 78,
 699,
 5753,
 10547,
 2997,
 1557,
 2506,
 1913,
 6927,
 4714,
 15440,
 158,
 5059,
 3480,
 948,
 2808,
 2414,
 6796,
 10229,
 20742,
 7545,
 4218,
 15062,
 4624,
 15610,
 12854,
 9399,
 2997,
 7590,
 13291,
 28083,
 1092,
 10659,
 435,
 21925,
 1879,
 16380,
 21630,
 8406,
 84,
 7259,
 336,
 16109,
 12712,
 4501,
 4757,
 6051,
 186,
 4190,
 6364,
 953,
 16685,
 7342,
 15617,
 2791,
 2595,
 1054,
 2081,
 101,
 2099,
 3376,
 9719,
 667,
 27960,
 1066,
 17107,
 4472,
 55326,
 5031,
 28395,
 2439,
 1815,
 20116,
 3427,
 125,
 3145,
 2693,
 4163,
 17021,
 18571,
 4501,
 5106,
 2914,
 660,
 9989,
 5454,
 8253,
 19577,
 2768,
 13063,
 2919,
 5214,
 8116,
 1641,
 16504,
 6081,
 14882,
 4246,
 3341,
 5630,
 3752,
 1385,
 15871,
 14161,
 23498,
 7359,
 504,
 4995,
 4184,
 5412,
 716,
 14246,
 1543,
 14923,
 6524,
 9200,
 17169,
 6281,
 3417,
 524,
 12404,
 16684,
 4829,
 1186,
 5432,
 422,
 22981,
 3489,
 3047,
 17791,
 6385,
 4876,
 3738,
 10728,
 15855,
 22069,
 2700,
 956,
 1099,
 2741,
 244,
 15794,
 10747,
 147,
 1709,
 11725,
 2315,
 1577,
 678,
 10024,
 26244,
 2367,
 26336,
 1720,
 3158,
 22531,
 1972,
 3141,
 13189,
 5545,
 10676,
 4287,
 429,
 12842,
 16280,
 18160,
 3166,
 25931,
 24461,
 7512,
 16698,
 995,
 4065,
 4194,
 4785,
 4392,
 14073,
 1611,
 10122,
 10449,
 3714,
 5142,
 7646,
 12585,
 296,
 4777,
 14841,
 10633,
 30152,
 15036,
 9147,
 11068,
 755,
 2529,
 6952,
 23186,
 3702,
 1593,
 8996,
 7897,
 2511,
 9750,
 34775,
 1273,
 3874,
 1145,
 3539,
 1529,
 26691,
 532,
 19710,
 3800,
 12509,
 2910,
 492,
 1422,
 10641,
 13936,
 5782,
 1099,
 2747,
 244,
 12656,
 2249,
 1742,
 2073,
 2832,
 2670,
 2223,
 4835,
 6180,
 3374,
 1486,
 189,
 1484,
 10277,
 495,
 26749,
 6125,
 7844,
 7883,
 1620,
 11264,
 8246,
 2016,
 3384,
 11436,
 22416,
 3167,
 6602,
 18870,
 953,
 471,
 17653,
 739,
 6623,
 2669,
 13543,
 1126,
 6061,
 3626,
 1201,
 16831,
 5617,
 804,
 10358,
 30170,
 1567,
 7868,
 8075,
 4687,
 2349,
 953,
 471,
 3248,
 7343,
 935,
 225,
 9061,
 3568,
 1527,
 2639,
 4489,
 4240,
 7074,
 2685,
 7235,
 9380,
 645,
 673,
 1715,
 5490,
 2604,
 15998,
 1591,
 8054,
 15533,
 1554,
 2428,
 28113,
 3660,
 3284,
 6481,
 11846,
 1392,
 1028,
 2296,
 244,
 2664,
 1099,
 508,
 7983,
 4547,
 9154,
 157,
 5164,
 8412,
 1569,
 21779,
 4075,
 8701,
 587,
 8880,
 6320,
 1880,
 244,
 2757,
 8247,
 498,
 7248,
 5189,
 19199,
 8085,
 8058,
 3256,
 1578,
 4823,
 21447,
 29253,
 1432,
 9814,
 244,
 2750,
 6349,
 189,
 20318,
 162,
 8583,
 2893,
 244,
 2664,
 10436,
 5770,
 5370,
 1038,
 18829,
 12722,
 9717,
 6143,
 1898,
 3124,
 16943,
 4335,
 4098,
 17587,
 249,
 236,
 19039,
 2024,
 6918,
 20233,
 2449,
 12528,
 11283,
 5237,
 4881,
 8820,
 661,
 31441,
 18835,
 3155,
 3738,
 1834,
 13581,
 4672,
 7320,
 1761,
 3697,
 3360,
 5885,
 9010,
 91,
 732,
 29665,
 3976,
 158,
 12675,
 7196,
 8151,
 3475,
 2898,
 4313,
 121,
 115,
 33362,
 914,
 1006]

In [32]:
1+2


Out[32]:
3

In [ ]: